
/*-----------------------------------------------*
Name: Haqdarshak Pilot Intervention-Cleaning
Date Created: 30 July, 2018
Date Last Modified: 28 April, 2021
Created by: Aaron Berman and modified by Daniela Paz and Saumya Mathur on Nov.13/2020 
Description: Clean raw data to create a screening count intermediate file
*-------------------------------------------------*/



*..................................................
**Install user-written commands
foreach package in mdesc nmissing veracrypt {
     capture which `package'
	 if _rc==111 ssc install `package'
}
*..................................................
clear all
set more off
version 12.0
cap log close
pause off



//open log 
*cd "$logs"
*log using "cleaning_screening_takeup.smcl", replace


********CODE INDIVIDUAL ELIGIBILITY

		***ADDED OCTOBER 16: CODE INDIVIDUAL ELIGIBILITY***
	use "$pilot/hh_data_treatment_b", clear 

	preserve
	keep household_id annual_income housekind_kuccha housekindobserve_kuccha enter_annual_income
	
	tempfile hh_attributes
	save "`hh_attributes'", replace
	restore

	use "$pilot/individual_data_treatment_b", clear 

	** merge with household attributes
	merge m:1 household_id using "`hh_attributes'"
	assert _merge == 3
	drop _merge

	** drop if missing
	drop if missing(enter_years_) & missing(member_gender_)


	** 3g age
	rename enter_years_ age
	label var age "Age"

	** check age of first respondent
	tab age if member_num == "1", m
	assert age >= 18 & !missing(age) if member_num == "1"
	sum age if member_num == "1", d

	** 3h gender
	gen male = 1 if member_gender_ == 1
	replace male = 0 if member_gender_ == 2
	tab male, m
	label var male "Male"

	** 3i literacy
	gen literate = read_write_ == 1
	tab literate, m
	label var literate "Can read of write"

	** 3k enrolled in school
	rename enrolled_in_school_ enrolled_in_school
	tab enrolled_in_school, m
	label var enrolled_in_school "Currently enrolled in school or training"

	** 3l marital status
	rename member_marital_status_ marital_status

	** 3m occupation
	rename occupation_ occupation

	** 3n income in last month

	** 3o hours worked last week

	** 3p have documents

	** 3q bank account

	** 3r disabled
	rename disabled_ disabled
	tab disabled, m
	label var disabled "Disabled"

	** 3s disability percentage

	** 3t adopted orphan
	rename adopted_orphan_ adopted_orphan

	** 3u mother separated/divorced/widowed/remarried widow
	rename mother_widowed_ mother_widowed

	** 3v mother or father disabled
	rename mother_father_disabled_ mother_father_disabled


	****************
	** code up eligibilities for top 8 schemes in Rajasthan
	****************

	** make sure not missing key variables
	assert !missing(male)
	*assert !missing(age)

	** 1
	gen elig_sukanya = 0
	replace elig_sukanya = 1 if age <= 10 & !missing(age) & male == 0
	tab elig_sukanya, m

	** 2
	gen elig_postoffice = 0
	replace elig_postoffice = 1 // if age >= 18 (Flavvy said this no longer applies)
	tab elig_postoffice, m

	** 3 
	gen elig_laborcard = 0
	replace elig_laborcard = 1 if age >= 18 & age <= 60 & inlist(occupation, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37)
	tab elig_laborcard, m

	** 4
	gen elig_awas_yojana = 0 // not done
	replace elig_awas_yojana = 1 if age >=18 & !missing(age) & housekind_kuccha == 1 
	tab elig_awas_yojana, m

	** 5
	gen elig_oldage_pension = 0 
	replace elig_oldage_pension = 1 if age >= 58 & !missing(age) & male == 1 & enter_annual_income < 48000
	replace elig_oldage_pension = 1 if age >= 55 & !missing(age) & male == 0 & enter_annual_income < 48000
	tab elig_oldage_pension, m

	** 6
	gen elig_palanhaar = . // this was too sensitive, so we did not ask these questions
	tab elig_palanhaar, m

	** 7
	gen elig_widow_pension = 0
	replace elig_widow_pension = 1 if age >= 18 & !missing(age) & male == 0 & enter_annual_income < 48000 & inlist(marital_status,3,4,5,6)
	tab elig_widow_pension, m

	** 8
	gen elig_atalpension = 0
	replace elig_atalpension = 1 if age >=18 & age <=40 & !missing(age)
	tab elig_atalpension, m

	//Generate household-level eligibility variables 
	collapse (sum) elig_*, by(household_id)
	//drop labor card variable since not one of top schemes 
	drop elig_laborcard

	foreach x of varlist elig_* {
		replace `x' = 1 if `x' != 0 & !missing(`x')
	}


	//tempfile 
	tostring household_id, replace
	tempfile hh_elig_dummies
	save `hh_elig_dummies', replace
	
	
********CODE HOUSEHOLD MEMBERS COUNT

	cd "$pilot"
	use "individual_data_treatment_b", clear 
	
	count
	//only keep non-missing member observations 
	drop if missing(member_age_ )&(member_gender_)
	count
	
	destring member_num, replace
	
	count
	
	collapse (count) member_num, by(household_id)
	
	count
	duplicates report household_id
	
	ren member_num num_members
	tab num_members
	
	cd "$intermediate_data"
	save "household_member_counts", replace
	


********CODE SCREENING TAKE-UP

	
	//use tracking sheets and merge in household roster data to determine who was disabled 
	cd "$pilot"
	use "pilot_intervention_tracking_sheets_all", clear 

	//merge by member id 
	merge m:1 member_id using "pilot_baseline_household_roster_all.dta"
	keep if _merge == 3 
	drop _merge 

	//indicate which households had disabled people screened 
	collapse (sum) num_disabled_screened=disabled, by(household_id) 
	gen hh_disabled_screened = (num_disabled_screened > 0 & !missing(num_disabled_screened))
	drop num_disabled_screened
	tostring household_id, replace 
	tempfile hh_disabled 
	save `hh_disabled', replace 

	//Use household roster data 
	cd "$pilot"
	use "pilot_baseline_household_roster_all", clear 
	
	
	//count number of HH members/children 
	collapse (count) num_members=member_id (sum) hh_num_child=child hh_num_disabled=disabled, by(household_id)
	tostring household_id, replace

	tempfile household_member_counts
	save `household_member_counts', replace 

	//get village & stratum designations 
	use "$pilot/hh_data_treatment_b", clear 
	preserve 
	keep household_id village_id stratum
	tostring household_id, replace
	tempfile village_strata
	save `village_strata', replace 
	restore 

	//create other relevant household-level variables
	//disability
	egen num_disabled = rowtotal(disabled_*)
	gen any_disability_hh = (num_disabled > 0) if !missing(num_disabled)

	gen member_male_1 = (member_gender_1 == 1)

	//economic disruptions variable
	egen num_disruptions = rowtotal(E_death_head_of_hh-E_dropinincome_cropfailure)

	keep household_id any_disability_hh enter_annual_income above_48000 caste* applied_for_govt_scheme member_male_1 num_disruptions administer_module10
	tostring household_id, replace
	tempfile baseline_characteristics
	save `baseline_characteristics', replace 


	//use household-level data from previous section 
	use "$pilot/pilot_intervention_household_screening_counts", clear 
	tostring household_id, replace
	merge 1:1 household_id using `household_member_counts' 
		//_merge == 1 means erroneous or missing HHID
		//_merge == 2 means baseline HH has not yet been screened 
	keep if _merge == 3
	drop _merge
	merge 1:1 household_id using `baseline_characteristics'
		//_merge == 1 means erroneous or missing HHID
		//_merge == 2 means baseline HH has not yet been screened
	keep if _merge == 3
	drop _merge	
	*pause
	merge 1:1 household_id using `village_strata'
		//_merge == 1 means erroneous or missing HHID 
		//_merge == 2 means baseline HH has not yet been screened 
	keep if _merge == 3
	drop _merge 

	merge 1:1 household_id using `hh_disabled'
		//_merge == 1 means HHID not in tracking sheets 
		//_merge == 2 indicates discrepancy between app and tracking sheets
	replace hh_disabled_screened = 0 if _merge == 1
	drop _merge 

	merge 1:1 household_id using `hh_elig_dummies'
	keep if _merge == 3
	drop _merge 

	//generate treatment groups 
	gen treat_100 = (discount == 100)
	gen treat_50 = (discount == 50)
	gen treat_0 = (discount == 0)

	//generate certain outcomes 
	gen pct_offered = num_offered / num_members
	gen pct_takeup = num_takeup / num_members
	replace pct_takeup = 0 if _merge_fam == 2 & any_takeup == 0
	replace num_takeup = 0 if _merge_fam == 2 & any_takeup == 0 //added 9/25 
	gen morethan1_takeup = (num_takeup > 1) if !missing(num_takeup)


	//generate dummies for village and stratum fixed effects
	tab village_id, gen(d_village_)
	tab stratum, gen(d_stratum_)
	
	save "$intermediate_data/screening_takeup", replace
	
	
	
